# This file is not intended for general use. Its purpose is to dump the .obo files that define
# the cvParam accession fields into a dependency-free format that can be bundled with pyimzml.
#
# It requires the additonal pip dependency obonet==0.2.6
import re
from collections import defaultdict
from datetime import datetime
from pprint import pformat


ontology_sources = [
    ('ms', 'https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo', ['MS']),
    ('uo', 'https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/master/unit.obo', ['UO']),
    ('ims', 'https://raw.githubusercontent.com/imzML/imzML/f2c8b6ce2affa8d8eef74d4bfe5922c815ff4dff/imagingMS.obo', ['IMS']),
]

if __name__ == '__main__':
    import obonet

    now = datetime.utcnow().isoformat()

    for ontology_name, src, namespaces in ontology_sources:
        print(f'Parsing {ontology_name}')
        graph = obonet.read_obo(src, ignore_obsolete=False)
        terms = {}
        enums = defaultdict(list)
        for node_id in graph.nodes:
            node = graph.nodes[node_id]
            if any(node_id.startswith(ns) for ns in namespaces) and 'name' in node:
                dtype = None
                for xref in node.get('xref', []):
                    m = re.match(r'^value-type:xsd\\:(\w+) ', xref)
                    if m:
                        dtype = 'xsd:' + m[1]
                        break

                terms[node_id] = (node['name'], dtype)

        with open(f'./{ontology_name}.py', 'wt') as f:
            f.write('# DO NOT EDIT BY HAND\n')
            f.write(f'# This file was autogenerated by dump_obo_files.py at {now}\n')
            terms_repr = pformat(terms, indent=4, width=100)
            f.write(f'terms = {terms_repr}\n')


